########################################################################################################################################
## ECONOMIC SHOCKS & ISSUE IMPORTANCE                                                                                                 ##
## Author:  Sarah Gomm, based on Hanretty et al. 2020 https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/0EIQXL ##
## Date:    February 2023                                                                                                             ##
## Data:    SEP Wave 6                                                                                                                ##
########################################################################################################################################


rm(list=ls())
# Check the username and set the working directory   ---------------------------
# Get the username 
username <- Sys.getenv("USERNAME")

if (username == "FILL IN YOUR USERNAME") {
  setwd("FILL IN YOUR WORKING DIRECTORY")
} else {
  stop("Unknown user. Please check your username.")
}

# Load Stan
library(rstan)
library(tidyverse)

###############
# Select dataset of interest

#load("Surveydata_envhigh.Rdata")
#load("Surveydata_envlow.Rdata")
#load("Surveydata_inchigh.Rdata")
#load("Surveydata_inclow.Rdata")

N_items <- 16
N_respondents <- nrow(svyclean)

#Recode covariates
svyclean$weight <- svyclean$weight_ww
svyclean$weight[is.na(svyclean$weight)] <- 0 


## estimate all covariate models in a loop
useWeights <- TRUE # use survey weights via quasi-likelihood?

for (useModel in c("treatment")){

  ####################
  ### Set up data for conjoint model
  
  # check that all menu items have same levels
  apply(sapply(conjoint_menus,levels),1,unique)
  
  # create lookup table for conjoint menu items
  conjoint_menu_levels <- data.frame(
    item = rep(1:N_items,each=4),
    level = rep(1:4,times=N_items),
    levels = levels(conjoint_menus$Manifesto_2_2_A_t1)
  )
  
  # convert conjoint menu data into array
  conjoint_menus_numeric_wide <- sapply(conjoint_menus,as.numeric)
  
  conjoint_menus_itemIDs <- apply(conjoint_menus_numeric_wide,2,function(x) conjoint_menu_levels$item[x])
  conjoint_menus_levelIDs <- apply(conjoint_menus_numeric_wide,2,function(x) conjoint_menu_levels$level[x])
  
  conjoint_menus_itemIDs_array <- array(conjoint_menus_itemIDs,
                                        dim=c(N_respondents,2,3,4),
                                        dimnames=list(Respondent=1:N_respondents,Alternative=c("A","B"),MenuPosition=1:3,Round=1:4))
  
  conjoint_menus_levelIDs_array <- array(conjoint_menus_levelIDs,
                                         dim=c(N_respondents,2,3,4),
                                         dimnames=list(Respondent=1:N_respondents,Alternative=c("A","B"),MenuPosition=1:3,Round=1:4))
  
  # lookup respondents positions for each item
  ordinal_responses_numeric <- sapply(ordinal_responses,as.numeric)
  conjoint_menus_positionIDs_array <- array(NA,
                                            dim=c(N_respondents,2,3,4),
                                            dimnames=list(Respondent=1:N_respondents,Alternative=c("A","B"),MenuPosition=1:3,Round=1:4))
  for (i in 1:N_respondents) for (j in 1:3) {
    conjoint_menus_positionIDs_array[i,,j,] <- 
      ordinal_responses_numeric[i,conjoint_menus_itemIDs_array[i,1,j,1]]
    if (is.na(conjoint_menus_positionIDs_array[i,1,j,1] )) {
      print(paste("Linking error at respondent",i,"menu item",j))
    }    
  }
  
  conjoint_responses_wide <- sapply(conjoint_responses,as.numeric) - 1
  
  
  # number of differing positions for each conjoint comparison, to allow estimation of different response thresholds
  
  differing_positions_count <- apply(conjoint_menus_levelIDs_array,c(1,4),function(x) sum(x[1,] != x[2,]))
  
  
  # set up covariates for respondent-variation in the weights on different issues
  
  heterogeneity_formula <- 
    switch(useModel,
           treatment = as.formula("~as.numeric(work_worse)+0"))
  
  respondent_covariates <- model.matrix(heterogeneity_formula,data=svyclean)
  
  N_covariates <- ncol(respondent_covariates)
  
  conjoint_scaling_data <- list(
    Y = conjoint_responses_wide,
    W = (if (useWeights) svyclean$weight else rep(1,nrow(conjoint_responses_wide))),
    X = respondent_covariates,
    D = differing_positions_count, #1,2, or 3 differing positions
    itemID = conjoint_menus_itemIDs_array,
    candidatepositionID = conjoint_menus_levelIDs_array,
    respondentpositionID = conjoint_menus_positionIDs_array,
    ordinal_responses = replace(ordinal_responses_numeric,is.na(ordinal_responses_numeric),0),
    N_menulength = 3,
    N_itemalts = 4,
    N_items = N_items,
    N_rounds = ncol(conjoint_responses_wide),
    N_respondents = nrow(conjoint_responses_wide),
    N_covariates = N_covariates
  )
  
  ###############
  # Stan code for conjoint analysis
  
  conjoint_code <- '

data {

    int<lower = 1> N_respondents; // Number of respondents 
    int<lower = 1> N_rounds; // Number of rounds per respondent 
    int<lower = 1> N_itemalts; // Number of policy alternatives 
    int<lower = 1> N_menulength; // Number of issues per conjoint 
    int<lower = 1> N_items; // Number of issues 
    int<lower = 1> N_covariates; // Number of respondent-level covariates 

    int<lower = 1,upper=2> Y[N_respondents,N_rounds]; // Matrix of conjoint response data
    real<lower = 0> W[N_respondents]; // Population weights
    int<lower = 1,upper=3> D[N_respondents,N_rounds]; // How many differing positions were there for each conjoint comparison?
    int<lower = 1,upper=N_items> itemID[N_respondents,2,N_menulength,N_rounds];
    int<lower = 1,upper=N_itemalts> candidatepositionID[N_respondents,2,N_menulength,N_rounds];
    int<lower = 1,upper=N_itemalts> respondentpositionID[N_respondents,2,N_menulength,N_rounds];

    int<lower = 0,upper=4> ordinal_responses[N_respondents,N_items];

    matrix[N_respondents,N_covariates] X;

}

parameters {

    vector<lower=0>[N_itemalts-1] psi_std[N_items]; // positions of alternatives on each issue
    real gamma[3]; // cutpoints for choosing A vs neutral vs B

    simplex[4] pi[N_items]; // estimated population proportions for calculating importance score

    vector[N_covariates] beta[N_items]; // covariate coefficients for item weights

}

transformed parameters {

    vector[N_itemalts] psi[N_items]; // positions of alternatives on each issue
    real mu[N_respondents,N_rounds,2]; // utility of A, utility of B
    real delta[N_respondents,N_rounds]; // utility difference

    for (j in 1:N_items) psi[j] = cumulative_sum(append_row(0.0, psi_std[j])); 

    for (i in 1:N_respondents){
        for (k in 1:N_rounds){
            mu[i,k,1] = -1*(
                        fabs(exp(dot_product(beta[itemID[i,1,1,k]],X[i]))*(psi[itemID[i,1,1,k],candidatepositionID[i,1,1,k]] - psi[itemID[i,1,1,k],respondentpositionID[i,1,1,k]])) + 
                        fabs(exp(dot_product(beta[itemID[i,1,2,k]],X[i]))*(psi[itemID[i,1,2,k],candidatepositionID[i,1,2,k]] - psi[itemID[i,1,2,k],respondentpositionID[i,1,2,k]])) + 
                        fabs(exp(dot_product(beta[itemID[i,1,3,k]],X[i]))*(psi[itemID[i,1,3,k],candidatepositionID[i,1,3,k]] - psi[itemID[i,1,3,k],respondentpositionID[i,1,3,k]]))
                        );
            mu[i,k,2] = -1*(
                        fabs(exp(dot_product(beta[itemID[i,1,1,k]],X[i]))*(psi[itemID[i,2,1,k],candidatepositionID[i,2,1,k]] - psi[itemID[i,2,1,k],respondentpositionID[i,2,1,k]])) + 
                        fabs(exp(dot_product(beta[itemID[i,1,2,k]],X[i]))*(psi[itemID[i,2,2,k],candidatepositionID[i,2,2,k]] - psi[itemID[i,2,2,k],respondentpositionID[i,2,2,k]])) + 
                        fabs(exp(dot_product(beta[itemID[i,1,3,k]],X[i]))*(psi[itemID[i,2,3,k],candidatepositionID[i,2,3,k]] - psi[itemID[i,2,3,k],respondentpositionID[i,2,3,k]]))
                        );
            delta[i,k] = mu[i,k,2] - mu[i,k,1];
        }
    }

}

model {

    for (i in 1:N_respondents){
        for (k in 1:N_rounds){
            target += W[i] * bernoulli_logit_lpmf(Y[i, k] | delta[i, k] - gamma[D[i, k]]);
        }
    }

    // population weighted quasi-likelihood for ordinal responses for computing population proportions
    for (i in 1:N_respondents){
        for (k in 1:N_items){
            if (ordinal_responses[i,k] != 0) target += W[i] * categorical_lpmf(ordinal_responses[i,k]|pi[k]);
        }
    }

    for (k in 1:N_items) for (l in 1:N_covariates) beta[k,l] ~ normal(0.0,1.0);

}

generated quantities {

}
'





options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
Sys.setenv(OPENBLAS_NUM_THREADS = 1)

posterior <- stan(
  model_code = conjoint_code, 
  data = conjoint_scaling_data,
  #init = geninits_replication,
  #seed = seed_replication(),
  pars=c(
    "psi","gamma","beta","pi"
  ), 
  iter = 2000,
  warmup = 500, 
  chains = 2,
  cores = 2,
  refresh = 1,
  control = list(max_treedepth = 15, adapt_delta = 0.99)
)  

save(posterior,conjoint_scaling_data,file=paste0("Posterior_envhigh_by-",useModel,".Rdata")) #adjust output name dependent on dataset used (envhigh, envlow, inchigh, inclow)



} # end loop over models
























